library(ggplot2)
library(reshape2)
library(dplyr)
library(tidyr)
library(GGally)
library(grid)
"%&%" = function(a,b) paste(a,b,sep="")
source('/Volumes/im-lab/nas40t2/hwheeler/PrediXcan-Paper/scripts/Heather/make-figures/multiplot.R')
my.dir <- '/Volumes/im-lab/nas40t2/hwheeler/cross-tissue/'
fig.dir <- '~/GitHub/GenArch/GenArchPaper/Figures/'
DGN-WB joint heritability. Local h2 is estimated with SNPs within 1 Mb of each gene. distal h2 is estimated with SNPs that are eQTLs in the Framingham Heart Study on other chromosomes (FDR < 0.05).
otherfile<-my.dir %&% 'expArch_DGN-WB_imputedGTs/DGN-WB.h2.all.models_FHSfdr0.05.all.Chr1-22_globalOtherChr.2015-03-18.txt'
fdrother<-read.table(otherfile,header=T) ##FHS eQTLs w/fdr<0.05 on non-gene chromosomes used to define global GRM
##Plot FDR based results
a<-ggplot(fdrother,aes(x=loc.jt.h2,y=glo.jt.h2)) + geom_point(cex=0.8) + geom_abline(intercept=1, slope=-1,color='red') + xlab(expression("local h"^2)) + ylab(expression("distal h"^2)) + coord_cartesian(ylim=c(-0.05,1.05),xlim=c(-0.05,1.05)) + theme(axis.text=element_text(size=16),axis.title=element_text(size=18,face="bold"))
##plot joint h2 estimates
local <- fdrother %>% select(loc.jt.h2,loc.jt.se) %>% arrange(loc.jt.h2)
names(local) = c('h2','se')
data <- local %>% mutate(ymin = pmax(0, h2 - 2 * se), ymax = pmin(1, h2 + 2 * se) )
data <- data[complete.cases(data),]
cigt0 <- data$ymin>0
table(cigt0)
## cigt0
## FALSE TRUE
## 4974 5989
ptrue<-round(table(cigt0)/sum(table(cigt0)),3)*100
ptrue
## cigt0
## FALSE TRUE
## 45.4 54.6
meanh2<-round(mean(data$h2),3)
meanh2
## [1] 0.13
data <- mutate(data,nonzeroCI=cigt0,position=1:nrow(data))
my_grob = grobTree(textGrob(substitute(paste("mean ", h^2, " = ", m),list(m=meanh2)), x=0.05, y=0.60, hjust=0,gp=gpar(fontsize=14)))
my_grob2 = grobTree(textGrob(substitute(paste("percent TRUE = ", m),list(m=ptrue[2])), x=0.05, y=0.66, hjust=0,gp=gpar(fontsize=14)))
b<-ggplot(data,aes(x=position,y=h2,ymin=ymin, ymax=ymax, color=nonzeroCI) ) + geom_pointrange(col='gray')+geom_point()+ylab(expression("local h"^2)) + xlab(expression("genes ordered by local h"^2))+coord_cartesian(ylim=c(-0.05,1.05))+theme(axis.text=element_text(size=16),axis.title=element_text(size=18,face="bold"),legend.justification=c(0,1),legend.position=c(0,1))+annotation_custom(my_grob)+annotation_custom(my_grob2)
global <- fdrother %>% select(glo.jt.h2,glo.jt.se) %>% arrange(glo.jt.h2)
names(global) = c('h2','se')
data <- global %>% mutate(ymin = pmax(0, h2 - 2 * se), ymax = pmin(1, h2 + 2 * se) )
data <- data[complete.cases(data),]
cigt0 <- data$ymin>0
table(cigt0)
## cigt0
## FALSE TRUE
## 10505 458
ptrue<-round(table(cigt0)/sum(table(cigt0)),3)*100
ptrue
## cigt0
## FALSE TRUE
## 95.8 4.2
meanh2<-round(mean(data$h2),3)
meanh2
## [1] 0.076
data <- mutate(data,nonzeroCI=cigt0,position=1:nrow(data))
my_grob = grobTree(textGrob(substitute(paste("mean ", h^2, " = ", m),list(m=meanh2)), x=0.05, y=0.60, hjust=0,gp=gpar(fontsize=14)))
my_grob2 = grobTree(textGrob(substitute(paste("percent TRUE = ", m),list(m=ptrue[2])), x=0.05, y=0.66, hjust=0,gp=gpar(fontsize=14)))
c<-ggplot(data,aes(x=position,y=h2,ymin=ymin, ymax=ymax, color=nonzeroCI) ) + geom_pointrange(col='gray')+geom_point()+ylab(expression("distal h"^2)) + xlab(expression("genes ordered by distal h"^2))+coord_cartesian(ylim=c(-0.05,1.05))+theme(axis.text=element_text(size=16),axis.title=element_text(size=18,face="bold"),legend.justification=c(0,1),legend.position=c(0,1))+annotation_custom(my_grob)+annotation_custom(my_grob2)
multiplot(a+ggtitle('A')+theme(plot.title=element_text(hjust=0,face="bold")),b+ggtitle('B')+theme(plot.title=element_text(hjust=0,face="bold")),c+ggtitle('C')+theme(plot.title=element_text(hjust=0,face="bold")),cols=1)
tiff(filename=fig.dir %&% "Fig1.tiff",width=360,height=960)
multiplot(a+ggtitle('A')+theme(plot.title=element_text(hjust=0,face="bold")),b+ggtitle('B')+theme(plot.title=element_text(hjust=0,face="bold")),c+ggtitle('C')+theme(plot.title=element_text(hjust=0,face="bold")),cols=1)
dev.off()
## quartz_off_screen
## 2
png(filename=fig.dir %&% "Fig1.png",width=360,height=960)
multiplot(a+ggtitle('A')+theme(plot.title=element_text(hjust=0,face="bold")),b+ggtitle('B')+theme(plot.title=element_text(hjust=0,face="bold")),c+ggtitle('C')+theme(plot.title=element_text(hjust=0,face="bold")),cols=1)
dev.off()
## quartz_off_screen
## 2
DGN-WB marginal heritability. Local h2 is estimated with SNPs within 1 Mb of each gene. distal h2 is estimated with SNPs that are eQTLs in the Framingham Heart Study on other chromosomes (FDR < 0.05).
##Plot FDR based results
ggplot(fdrother,aes(x=local.h2,y=global.h2)) + geom_point(cex=0.8) + geom_abline(intercept=1, slope=-1,color='red') + xlab(expression("DGN marginal local h"^2)) + ylab(expression("DGN marginal distal h"^2)) + coord_cartesian(ylim=c(-0.05,1.05),xlim=c(-0.05,1.05)) + theme(axis.text=element_text(size=16),axis.title=element_text(size=18,face="bold"))
DGN-WB joint heritability with known trans-eQTLs. Local h2 is estimated with SNPs within 1 Mb of each gene. Known trans h2 is estimated with SNPs that are trans-eQTLs in the Framingham Heart Study for each gene (FDR < 0.05).
transfile<-my.dir %&% 'expArch_DGN-WB_imputedGTs/DGN-WB.h2.all.models_FHSfdr0.05.all.Chr1-22_transForGene.2015-03-20.txt'
fdrtrans<-read.table(transfile,header=T) ##FHS trans-eQTLs for gene w/fdr<0.05 used to define Known trans GRM
##Plot FDR based results
a<-ggplot(fdrtrans,aes(x=loc.jt.h2,y=trans.jt.h2)) + geom_point(cex=0.8) + geom_abline(intercept=1, slope=-1,color='red') + xlab(expression("local h"^2)) + ylab(expression("known trans h"^2)) + coord_cartesian(ylim=c(-0.05,1.05),xlim=c(-0.05,1.05)) + theme(axis.text=element_text(size=16),axis.title=element_text(size=18,face="bold"))
##plot joint h2 estimates
local <- fdrtrans %>% select(loc.jt.h2,loc.jt.se) %>% arrange(loc.jt.h2)
names(local) = c('h2','se')
data <- local %>% mutate(ymin = pmax(0, h2 - 2 * se), ymax = pmin(1, h2 + 2 * se) )
data <- data[complete.cases(data),]
cigt0 <- data$ymin>0
table(cigt0)
## cigt0
## FALSE TRUE
## 462 732
ptrue<-round(table(cigt0)/sum(table(cigt0)),3)*100
ptrue
## cigt0
## FALSE TRUE
## 38.7 61.3
meanh2<-round(mean(data$h2),3)
meanh2
## [1] 0.133
data <- mutate(data,nonzeroCI=cigt0,position=1:nrow(data))
my_grob = grobTree(textGrob(substitute(paste("mean ", h^2, " = ", m),list(m=meanh2)), x=0.05, y=0.60, hjust=0,gp=gpar(fontsize=14)))
my_grob2 = grobTree(textGrob(substitute(paste("percent TRUE = ", m),list(m=ptrue[2])), x=0.05, y=0.66, hjust=0,gp=gpar(fontsize=14)))
b<-ggplot(data,aes(x=position,y=h2,ymin=ymin, ymax=ymax,color=nonzeroCI) ) + geom_pointrange(col='gray')+geom_point()+ylab(expression("local h"^2)) + xlab(expression("genes ordered by local h"^2))+coord_cartesian(ylim=c(-0.05,1.05),xlim=c(-30,1280))+ theme(axis.text=element_text(size=16),axis.title=element_text(size=18,face="bold"),legend.justification=c(0,1), legend.position=c(0,1))+annotation_custom(my_grob)+annotation_custom(my_grob2)
global <- fdrtrans %>% select(trans.jt.h2,trans.jt.se) %>% arrange(trans.jt.h2)
names(global) = c('h2','se')
data <- global %>% mutate(ymin = pmax(0, h2 - 2 * se), ymax = pmin(1, h2 + 2 * se) )
data <- data[complete.cases(data),]
cigt0 <- data$ymin>0
table(cigt0)
## cigt0
## FALSE TRUE
## 1144 50
ptrue<-round(table(cigt0)/sum(table(cigt0)),3)*100
ptrue
## cigt0
## FALSE TRUE
## 95.8 4.2
meanh2<-round(mean(data$h2),3)
meanh2
## [1] 0.021
data <- mutate(data,nonzeroCI=cigt0,position=1:nrow(data))
my_grob = grobTree(textGrob(substitute(paste("mean ", h^2, " = ", m),list(m=meanh2)), x=0.05, y=0.60, hjust=0,gp=gpar(fontsize=14)))
my_grob2 = grobTree(textGrob(substitute(paste("percent TRUE = ", m),list(m=ptrue[2])), x=0.05, y=0.66, hjust=0,gp=gpar(fontsize=14)))
c<-ggplot(data,aes(x=position,y=h2,ymin=ymin, ymax=ymax,color=nonzeroCI) ) + geom_pointrange(col='gray')+geom_point()+ylab(expression("known trans h"^2)) + xlab(expression("genes ordered by known trans h"^2))+coord_cartesian(ylim=c(-0.05,1.05),xlim=c(-30,1280))+ theme(axis.text=element_text(size=16),axis.title=element_text(size=18,face="bold"),legend.justification=c(0,1), legend.position=c(0,1))+annotation_custom(my_grob)+annotation_custom(my_grob2)
multiplot(a+ggtitle('A')+theme(plot.title=element_text(hjust=0,face="bold")),b+ggtitle('B')+theme(plot.title=element_text(hjust=0,face="bold")),c+ggtitle('C')+theme(plot.title=element_text(hjust=0,face="bold")),cols=1)
tiff(filename=fig.dir %&% "Fig2.tiff",width=360,height=960)
multiplot(a+ggtitle('A')+theme(plot.title=element_text(hjust=0,face="bold")),b+ggtitle('B')+theme(plot.title=element_text(hjust=0,face="bold")),c+ggtitle('C')+theme(plot.title=element_text(hjust=0,face="bold")),cols=1)
dev.off()
## quartz_off_screen
## 2
png(filename=fig.dir %&% "Fig2.png",width=360,height=960)
multiplot(a+ggtitle('A')+theme(plot.title=element_text(hjust=0,face="bold")),b+ggtitle('B')+theme(plot.title=element_text(hjust=0,face="bold")),c+ggtitle('C')+theme(plot.title=element_text(hjust=0,face="bold")),cols=1)
dev.off()
## quartz_off_screen
## 2
Polygenic v. sparse by elastic net.
data<-read.table(my.dir %&% 'working_DGN-WB_exp_10-foldCV_1-reps_elasticNet_eachAlphaR2_hapmap2snps_chr22_2015-01-21.txt',header=T,check.names=F)
ngenes<-dim(data)[1]
print("Elastic Net DGN-WB chr22 (" %&% ngenes %&% " genes)")
## [1] "Elastic Net DGN-WB chr22 (341 genes)"
data_long<-melt(data,by=gene)
## Using gene as id variables
a <- ggplot(data_long, aes(x = as.numeric(levels(variable))[variable] , y = value), group=gene) + geom_line(lwd=0.5,show_guide = FALSE,linetype=1) + aes(color = gene) + xlab(expression(paste("elastic net mixing parameter (",alpha, ")"))) + ylab(expression(paste("10-fold cross-validation R"^2))) + theme_gray(base_size = 20) + coord_cartesian(ylim=c(0.3,1),xlim=c(-0.02,1.02))+ geom_point(show_guide = FALSE)
print(a)
data2 <- select(data,gene,2:3,12,22)
gdata <- gather(data2,alpha,R2,2:4)
b<- ggplot(gdata, aes(y = R2 , x = gdata[,2], group=alpha, color=alpha)) + geom_point(show_guide = TRUE) + ylab(expression(paste("alpha R"^2))) + xlab(expression(paste("LASSO (",alpha,"=1) R"^2))) + geom_smooth(method = "lm")+geom_abline(intercept=0, slope=1,color='black')+ coord_cartesian(ylim=c(-0.05,1.05),xlim=c(-0.05,1.05)) + theme_gray(base_size = 20) + theme(legend.justification=c(0,1), legend.position=c(0,1))
blandaltman<-ggplot(gdata, aes(x = gdata[,2] , y = gdata[,2]-gdata[,4], group=alpha, color=alpha)) + geom_point(show_guide = TRUE) + ylab(expression(paste("R"^2, " difference (LASSO - alpha)"))) + xlab(expression(paste("R"^2, " (LASSO)"))) + theme_gray(base_size = 20) + theme(legend.justification=c(0,1), legend.position=c(0,1))
blandaltman
###what is the point below zero?
newgdata<-mutate(gdata, diff=`1`-R2) %>% arrange(diff)
head(newgdata)
## gene 1 alpha R2 diff
## 1 GTSE1 3.054340e-07 0 0.03915528 -0.039154977
## 2 ARFGAP3 5.344014e-03 0 0.01668370 -0.011339686
## 3 USP18 5.804827e-08 0 0.01030295 -0.010302887
## 4 C22orf40 4.519708e-04 0 0.01048670 -0.010034727
## 5 THAP7 1.908300e-05 0 0.00935239 -0.009333307
## 6 NFAM1 6.489269e-03 0 0.01563154 -0.009142272
filter(newgdata,gene=='GTSE1')
## gene 1 alpha R2 diff
## 1 GTSE1 3.05434e-07 0 3.915528e-02 -3.915498e-02
## 2 GTSE1 3.05434e-07 0.05 8.159165e-04 -8.156110e-04
## 3 GTSE1 3.05434e-07 0.5 2.353392e-06 -2.047958e-06
##G-2 And S-Phase Expressed 1, The protein encoded by this gene is only expressed in the S and G2 phases of the cell cycle, where it colocalizes with cytoplasmic tubulin and microtubules. In response to DNA damage, the encoded protein accumulates in the nucleus and binds the tumor suppressor protein p53, shuttling it out of the nucleus and repressing its ability to induce apoptosis.
png(filename=fig.dir %&% "DGNchr22blandAltmanEN.png",width=480,height=480)
blandaltman
dev.off()
## quartz_off_screen
## 2
#old fig3
multiplot(a+ggtitle('A')+theme(plot.title=element_text(hjust=0,face="bold")),b+ggtitle('B')+theme(plot.title=element_text(hjust=0,face="bold")),cols=2)
tiff(filename=fig.dir %&% "Fig3.tiff",width=960,height=480)
multiplot(a+ggtitle('A')+theme(plot.title=element_text(hjust=0,face="bold")),blandaltman+ggtitle('B')+theme(plot.title=element_text(hjust=0,face="bold")),cols=2)
dev.off()
## quartz_off_screen
## 2
png(filename=fig.dir %&% "Fig3.png",width=960,height=480)
multiplot(a+ggtitle('A')+theme(plot.title=element_text(hjust=0,face="bold")),blandaltman+ggtitle('B')+theme(plot.title=element_text(hjust=0,face="bold")),cols=2)
dev.off()
## quartz_off_screen
## 2
GTEx tissue-wide heritability. Local h2 is estimated with SNPs within 1 Mb of each gene.
h2TW<-read.table(my.dir %&% "GTEx_Tissue-Wide_local_h2.txt",header=T)
seTW<-read.table(my.dir %&% "GTEx_Tissue-Wide_local_se.txt",header=T)
###make ordered point+CI h2 plots
gTW_h2 <- gather(h2TW,"Tissue.h2","TissueWide.h2",2:11)
gTW_se <- gather(seTW,"Tissue.se","TissueWide.se",2:11)
gTW_h2_se <- cbind(gTW_h2,gTW_se[,3])
colnames(gTW_h2_se) <- c("ensid","Tissue","h2","se")
ngenes<-nrow(gTW_h2_se)/length(unique(gTW_h2_se$Tissue))
gTW_h2_se <- gTW_h2_se %>% mutate(ymin = pmax(0, h2 - 2 * se), ymax = pmin(1, h2 + 2 * se) ) %>% arrange(Tissue,h2) %>% mutate(place=rep(1:ngenes,10),nonzeroCI=ymin>0)
fig4<-ggplot(gTW_h2_se,aes(x=place,y=h2,ymin=ymin, ymax=ymax, color=nonzeroCI) ) + facet_wrap(~Tissue,ncol=2) + geom_pointrange(col='gray')+geom_point()+theme(legend.justification=c(0,1),legend.position=c(0,1))+ylab(expression("local h"^2)) + xlab(expression("genes ordered by local h"^2))+coord_cartesian(ylim=c(-0.05,1.05))+ theme(axis.text=element_text(size=16),axis.title=element_text(size=18,face="bold"),strip.text.x = element_text(size = 18),legend.text=element_text(size=14),legend.title=element_text(size=14))
###calc % nonzero for each tissue TISSUE-WIDE
### ADD to plot legend
pvec<-vector()
h2vec<-vector()
a<-gTW_h2_se %>% select(ensid,Tissue,nonzeroCI) %>% spread(Tissue,nonzeroCI)
for(i in 2:11){
tis<-colnames(a)[i]
cat("\n\n---",tis,"---\n")
print(table(a[,i]))
per <- signif(table(a[,i])/sum(table(a[,i])),3)*100
print(per)
pvec <- c(pvec,per[2])
}
##
##
## --- CrossTissue ---
##
## FALSE TRUE
## 14069 2938
##
## FALSE TRUE
## 82.7 17.3
##
##
## --- AdiposeSubcutaneous ---
##
## FALSE TRUE
## 15880 1127
##
## FALSE TRUE
## 93.40 6.63
##
##
## --- ArteryTibial ---
##
## FALSE TRUE
## 15836 1171
##
## FALSE TRUE
## 93.10 6.89
##
##
## --- HeartLeftVentricle ---
##
## FALSE TRUE
## 16258 749
##
## FALSE TRUE
## 95.6 4.4
##
##
## --- Lung ---
##
## FALSE TRUE
## 16078 929
##
## FALSE TRUE
## 94.50 5.46
##
##
## --- MuscleSkeletal ---
##
## FALSE TRUE
## 15944 1063
##
## FALSE TRUE
## 93.70 6.25
##
##
## --- NerveTibial ---
##
## FALSE TRUE
## 15541 1466
##
## FALSE TRUE
## 91.40 8.62
##
##
## --- SkinSunExposedLowerleg ---
##
## FALSE TRUE
## 15809 1198
##
## FALSE TRUE
## 93.00 7.04
##
##
## --- Thyroid ---
##
## FALSE TRUE
## 15680 1327
##
## FALSE TRUE
## 92.2 7.8
##
##
## --- WholeBlood ---
##
## FALSE TRUE
## 16062 945
##
## FALSE TRUE
## 94.40 5.56
###calc mean h2 for each tissue
a<-gTW_h2_se %>% select(ensid,Tissue,h2) %>% spread(Tissue,h2)
for(i in 2:11){
tis<-colnames(a)[i]
meanh2 <- signif(mean(a[,i]),3)
cat(tis,"mean h2:",meanh2,"\n")
h2vec <- c(h2vec,meanh2)
}
## CrossTissue mean h2: 0.057
## AdiposeSubcutaneous mean h2: 0.0338
## ArteryTibial mean h2: 0.0358
## HeartLeftVentricle mean h2: 0.0383
## Lung mean h2: 0.0325
## MuscleSkeletal mean h2: 0.0279
## NerveTibial mean h2: 0.044
## SkinSunExposedLowerleg mean h2: 0.0351
## Thyroid mean h2: 0.0392
## WholeBlood mean h2: 0.0265
ann_text <- data.frame( h2 = rep(0.78,10), place = rep(5000,10), percent= pvec, mean_h2 = h2vec, Tissue = factor(colnames(a)[2:11]), ymin=rep(0.9,10), ymax=rep(0.9,10), nonzeroCI=rep(NA,10), se=rep(0.9,10))
fig4.2<-fig4+geom_text(data=ann_text,aes(label=paste("mean_h^2 ==",mean_h2,sep="")),color="black",show_guide=F,parse=T,hjust=0,size=5)
ann_text <- data.frame( h2 = rep(0.9,10), place = rep(5000,10), percent= pvec, mean_h2 = h2vec, Tissue = factor(colnames(a)[2:11]), ymin=rep(0.9,10), ymax=rep(0.9,10), nonzeroCI=rep(NA,10), se=rep(0.9,10))
fig4<-fig4.2+geom_text(data=ann_text,aes(label=paste("percent_TRUE ==",percent,sep="")),color="black",show_guide=F,parse=T,hjust=0,size=5)
fig4
tiff(filename=fig.dir %&% "Fig4.tiff",width=720,height=960)
fig4
dev.off()
## quartz_off_screen
## 2
png(filename=fig.dir %&% "Fig4.png",width=720,height=960)
fig4
dev.off()
## quartz_off_screen
## 2
GTEx cross-tissue and tissue-wide h2 (A) and SE (B).
h2TW<-read.table(my.dir %&% "GTEx_Tissue-Wide_local_h2.txt",header=T)
seTW<-read.table(my.dir %&% "GTEx_Tissue-Wide_local_se.txt",header=T)
gh2_TW<-gather(h2TW,"CrossTissue","Tissue",3:11)
colnames(gh2_TW) <- c('ensid','CrossTissue','TissueName','Tissue')
fig5a<-ggplot(gh2_TW,aes(x=Tissue,y=CrossTissue)) +facet_wrap(~TissueName,scales="fixed",ncol=3) + geom_point(alpha=1/10) + geom_abline(intercept=0, slope=1,color='red') + ylab(expression('Cross-Tissue h'^2)) + xlab(expression('Tissue-Wide h'^2)) + ggtitle("A")+ coord_cartesian(ylim=c(-0.1,1.1),xlim=c(-0.1,1.1)) + theme(plot.title = element_text(hjust = 0,face="bold"))
gse_TW<-gather(seTW,"CrossTissue","Tissue",3:11)
colnames(gse_TW) <- c('ensid','CrossTissue','TissueName','Tissue')
fig5b<-ggplot(gse_TW,aes(x=Tissue,y=CrossTissue)) +facet_wrap(~TissueName,scales="fixed",ncol=3) + geom_point(alpha=1/10) + geom_abline(intercept=0, slope=1,color='red') + ylab('Cross-Tissue SE') + xlab('Tissue-Wide SE') + ggtitle("B") + coord_cartesian(ylim=c(-0.01,0.16),xlim=c(-0.01,0.16))+ theme(plot.title = element_text(hjust = 0,face="bold"))
multiplot(fig5a,fig5b)
tiff(filename=fig.dir %&% "Fig5.tiff",width=480,height=960)
multiplot(fig5a,fig5b)
dev.off()
## quartz_off_screen
## 2
png(filename=fig.dir %&% "Fig5.png",width=480,height=960)
multiplot(fig5a,fig5b)
dev.off()
## quartz_off_screen
## 2
chr22<-read.table(my.dir %&% "gtex-annot/gencode.v18.genes.patched_contigs.summary.protein.chr22")
gtexEN<-read.table(my.dir %&% "GTEx_tissue-wide_elasticNet_for_ggplot2_2015-05-15.txt",header=T)
chr22EN <- filter(gtexEN,ensid %in% chr22$V5)
ngenes <- length(unique(chr22EN$ensid))
a<-ggplot(chr22EN, aes(x = alpha , y = R2), group=ensid) + facet_wrap(~tissue,ncol=3) + geom_point(show_guide = FALSE) + geom_line(lwd = 0.5, show_guide = FALSE) + aes(color = ensid) + xlab("alpha") + ylab("R2") + ggtitle("GTEx tissue-wide chr22 (" %&% ngenes %&% " genes)")
a
png(filename="GTEx-TW-EN.png")
a
dev.off()
## quartz_off_screen
## 2
s_gtexEN<-spread(chr22EN,alpha,R2)
g_gtexEN<-gather(s_gtexEN,alpha,R2,3:5) %>% arrange(tissue)
b<-ggplot(g_gtexEN, aes(x = R2 , y = g_gtexEN[,3], group=alpha, color=alpha)) + facet_wrap(~tissue,ncol=3) + geom_point(show_guide = TRUE) + xlab("alpha R2") + ylab("lasso R2")+ geom_smooth(method = "lm")+geom_abline(intercept=0, slope=1,color='black')+ coord_cartesian(ylim=c(-0.05,1.05),xlim=c(-0.05,1.05))+ ggtitle("GTEx tissue-wide chr22 (" %&% ngenes %&% " genes)")
b
png(filename="GTEx-TW-EN_lassoR2_v_alphaR2.png")
b
dev.off()
## quartz_off_screen
## 2
ngenesall <- length(unique(gtexEN$ensid))
s_gtexEN<-spread(gtexEN,alpha,R2)
g_gtexEN<-gather(s_gtexEN,alpha,R2,3:5) %>% arrange(tissue)
ggplot(g_gtexEN, aes(x = R2 , y = g_gtexEN[,3], group=alpha, color=alpha)) + facet_wrap(~tissue,ncol=3) + geom_point(show_guide = TRUE) + xlab("alpha R2") + ylab("lasso R2")+ geom_smooth(method = "lm")+geom_abline(intercept=0, slope=1,color='black')+ coord_cartesian(ylim=c(-0.05,1.05),xlim=c(-0.05,1.05))+ ggtitle("GTEx tissue-wide (" %&% ngenesall %&% " genes)")
###GTEx cross-tissue elastic net
cten<-read.table(my.dir %&% 'cross-tissue_exp_10-foldCV_elasticNet_R2_for_ggplot2.txt',header=T,check.names=F)
ngenesall <- length(unique(cten$gene))
g_cten<-gather(cten,alpha,R2,3:5)
a<-ggplot(g_cten, aes(y = `1` - R2, x = `1`, group=alpha, color=alpha)) + geom_point(show_guide = TRUE) + ylab(expression(paste("R"^2, " difference (LASSO - alpha)"))) + xlab(expression(paste("R"^2, " (LASSO)"))) +ggtitle("GTEx cross-tissue (" %&% ngenesall %&% " genes)")+theme_gray(18)
a
## Warning in loop_apply(n, do.ply): Removed 10623 rows containing missing
## values (geom_point).
png(filename="GTEx-cross-tissue-EN_lassoR2_v_alphaR2.png")
a
## Warning in loop_apply(n, do.ply): Removed 10623 rows containing missing
## values (geom_point).
dev.off()
## quartz_off_screen
## 2
###what are points below zero?
new_cten<-mutate(g_cten, diff=`1`-R2) %>% arrange(diff)
head(new_cten,n=10L)
## gene cross-tissue 1 alpha R2 diff
## 1 RBFADN cross-tissue 0.183801495 0.05 0.25759279 -0.07379129
## 2 GRAMD1B cross-tissue 0.004506654 0.05 0.05309162 -0.04858496
## 3 KCTD18 cross-tissue 0.003009104 0.05 0.05102096 -0.04801185
## 4 FAM101B cross-tissue 0.137451577 0.05 0.17350533 -0.03605376
## 5 WDR81 cross-tissue 0.045779349 0.05 0.08066177 -0.03488242
## 6 FICD cross-tissue 0.005234580 0.05 0.04010692 -0.03487234
## 7 FHIT cross-tissue 0.038865715 0.05 0.07169432 -0.03282860
## 8 NEIL2 cross-tissue 0.192899836 0.05 0.22568531 -0.03278547
## 9 CAPN8 cross-tissue 0.266590755 0.05 0.29804730 -0.03145654
## 10 RPS23 cross-tissue 0.091904576 0.05 0.12333971 -0.03143513
filter(new_cten,gene=='RBFADN')
## gene cross-tissue 1 alpha R2 diff
## 1 RBFADN cross-tissue 0.1838015 0.05 0.2575928 -0.0737912903
## 2 RBFADN cross-tissue 0.1838015 0.5 0.1973535 -0.0135520213
## 3 RBFADN cross-tissue 0.1838015 0.95 0.1841779 -0.0003764431
GTEx tissue-specific heritability. Local h2 is estimated with SNPs within 1 Mb of each gene.
h2TS<-read.table(my.dir %&% "GTEx_Tissue-Specific_local_h2.txt",header=T)
seTS<-read.table(my.dir %&% "GTEx_Tissue-Specific_local_se.txt",header=T)
###make ordered point+CI h2 plots
gTS_h2 <- gather(h2TS,"Tissue.h2","TissueSpecific.h2",2:11)
gTS_se <- gather(seTS,"Tissue.se","TissueSpecific.se",2:11)
gTS_h2_se <- cbind(gTS_h2,gTS_se[,3])
colnames(gTS_h2_se) <- c("ensid","Tissue","h2","se")
ngenes<-nrow(gTS_h2_se)/length(unique(gTS_h2_se$Tissue))
gTS_h2_se <- gTS_h2_se %>% mutate(ymin = pmax(0, h2 - 2 * se), ymax = pmin(1, h2 + 2 * se) ) %>% arrange(Tissue,h2) %>% mutate(place=rep(1:ngenes,10),nonzeroCI=ymin>0)
figS1<-ggplot(gTS_h2_se,aes(x=place,y=h2,ymin=ymin, ymax=ymax, color=nonzeroCI) ) + facet_wrap(~Tissue,ncol=2) + geom_pointrange(col='gray')+geom_point()+ theme(legend.justification=c(0,1), legend.position=c(0,1))+ylab(expression("local h"^2)) + xlab(expression("genes ordered by local h"^2))+ theme(axis.text=element_text(size=16),axis.title=element_text(size=18,face="bold"),strip.text.x = element_text(size = 18),legend.text=element_text(size=14),legend.title=element_text(size=14))+ coord_cartesian(ylim=c(0,1))
###calc % nonzero for each tissue TISSUE-WIDE
### ADD to plot legend
pvec<-vector()
h2vec<-vector()
a<-gTS_h2_se %>% select(ensid,Tissue,nonzeroCI) %>% spread(Tissue,nonzeroCI)
for(i in 2:11){
tis<-colnames(a)[i]
cat("\n\n---",tis,"---\n")
print(table(a[,i]))
per <- signif(table(a[,i])/sum(table(a[,i])),3)*100
print(per)
pvec <- c(pvec,per[2])
}
##
##
## --- CrossTissue ---
##
## FALSE TRUE
## 14069 2938
##
## FALSE TRUE
## 82.7 17.3
##
##
## --- AdiposeSubcutaneous ---
##
## FALSE TRUE
## 16800 207
##
## FALSE TRUE
## 98.80 1.22
##
##
## --- ArteryTibial ---
##
## FALSE TRUE
## 16701 306
##
## FALSE TRUE
## 98.2 1.8
##
##
## --- HeartLeftVentricle ---
##
## FALSE TRUE
## 16709 298
##
## FALSE TRUE
## 98.20 1.75
##
##
## --- Lung ---
##
## FALSE TRUE
## 16784 223
##
## FALSE TRUE
## 98.70 1.31
##
##
## --- MuscleSkeletal ---
##
## FALSE TRUE
## 16573 434
##
## FALSE TRUE
## 97.40 2.55
##
##
## --- NerveTibial ---
##
## FALSE TRUE
## 16628 379
##
## FALSE TRUE
## 97.80 2.23
##
##
## --- SkinSunExposedLowerleg ---
##
## FALSE TRUE
## 16558 449
##
## FALSE TRUE
## 97.40 2.64
##
##
## --- Thyroid ---
##
## FALSE TRUE
## 16565 442
##
## FALSE TRUE
## 97.4 2.6
##
##
## --- WholeBlood ---
##
## FALSE TRUE
## 16517 490
##
## FALSE TRUE
## 97.10 2.88
###calc mean h2 for each tissue
a<-gTS_h2_se %>% select(ensid,Tissue,h2) %>% spread(Tissue,h2)
for(i in 2:11){
tis<-colnames(a)[i]
meanh2 <- signif(mean(a[,i]),3)
cat(tis,"mean h2:",meanh2,"\n")
h2vec <- c(h2vec,meanh2)
}
## CrossTissue mean h2: 0.057
## AdiposeSubcutaneous mean h2: 0.0204
## ArteryTibial mean h2: 0.0249
## HeartLeftVentricle mean h2: 0.0323
## Lung mean h2: 0.0223
## MuscleSkeletal mean h2: 0.0214
## NerveTibial mean h2: 0.0281
## SkinSunExposedLowerleg mean h2: 0.0288
## Thyroid mean h2: 0.0265
## WholeBlood mean h2: 0.0261
ann_text <- data.frame( h2 = rep(0.78,10), place = rep(5000,10), percent= pvec, mean_h2 = h2vec, Tissue = factor(colnames(a)[2:11]), ymin=rep(0.9,10), ymax=rep(0.9,10), nonzeroCI=rep(NA,10), se=rep(0.9,10))
figS1.2<-figS1+geom_text(data=ann_text,aes(label=paste("mean_h^2 ==",mean_h2,sep="")),color="black",show_guide=F,parse=T,hjust=0,size=5)
ann_text <- data.frame( h2 = rep(0.9,10), place = rep(5000,10), percent= pvec, mean_h2 = h2vec, Tissue = factor(colnames(a)[2:11]), ymin=rep(0.9,10), ymax=rep(0.9,10), nonzeroCI=rep(NA,10), se=rep(0.9,10))
figS1<-figS1.2+geom_text(data=ann_text,aes(label=paste("percent_TRUE ==",percent,sep="")),color="black",show_guide=F,parse=T,hjust=0,size=5)
figS1
tiff(filename=fig.dir %&% "FigS1.tiff",width=720,height=960)
figS1
dev.off()
## quartz_off_screen
## 2
png(filename=fig.dir %&% "FigS1.png",width=720,height=960)
figS1
dev.off()
## quartz_off_screen
## 2
GTEx cross-tissue and tissue-specific h2 (A) and SE (B).
h2TS<-read.table(my.dir %&% "GTEx_Tissue-Specific_local_h2.txt",header=T)
seTS<-read.table(my.dir %&% "GTEx_Tissue-Specific_local_se.txt",header=T)
gh2_TS<-gather(h2TS,"CrossTissue","Tissue",3:11)
colnames(gh2_TS) <- c('ensid','CrossTissue','TissueName','Tissue')
figS2a<-ggplot(gh2_TS,aes(x=Tissue,y=CrossTissue)) +facet_wrap(~TissueName,scales="fixed",ncol=3) + geom_point(alpha=1/10) + geom_abline(intercept=0, slope=1,color='red') + ylab(expression('Cross-Tissue h'^2)) + xlab(expression('Tissue-Specific h'^2)) + ggtitle("A")+ coord_cartesian(ylim=c(-0.1,1.1),xlim=c(-0.1,1.1)) + theme(plot.title = element_text(hjust = 0,face="bold"))
gse_TS<-gather(seTS,"CrossTissue","Tissue",3:11)
colnames(gse_TS) <- c('ensid','CrossTissue','TissueName','Tissue')
figS2b<-ggplot(gse_TS,aes(x=Tissue,y=CrossTissue)) +facet_wrap(~TissueName,scales="fixed",ncol=3) + geom_point(alpha=1/10) + geom_abline(intercept=0, slope=1,color='red') + ylab('Cross-Tissue SE') + xlab('Tissue-Specific SE') + ggtitle("B") + coord_cartesian(ylim=c(-0.01,0.16),xlim=c(-0.01,0.16))+ theme(plot.title = element_text(hjust = 0,face="bold"))
multiplot(figS2a,figS2b)
tiff(filename=fig.dir %&% "FigS2.tiff",width=480,height=960)
multiplot(figS2a,figS2b)
dev.off()
## quartz_off_screen
## 2
png(filename=fig.dir %&% "FigS2.png",width=480,height=960)
multiplot(figS2a,figS2b)
dev.off()
## quartz_off_screen
## 2
[[ADD h2 vs. variance]]
se <- function(x) sqrt(var(x)/length(x))
h2 <- read.table('/Volumes/im-lab/nas40t2/hwheeler/PrediXcan_CV/cis.v.trans.prediction/DGN-WB.localGRM.h2.exp.2014-08-30.txt',header=T)
rawcounts <- read.table('/Volumes/im-lab/nas40t2/Data/Transcriptome/WB1K/data_used_for_eqtl_study/raw_counts.txt',header=T)
expmean<-data.frame(mean.exp=colMeans(rawcounts),se.exp=apply(rawcounts,2,se)) %>%mutate(gene=as.factor(colnames(rawcounts)))
all <- inner_join(expmean,h2,by='gene',copy=TRUE)
ngenes<-dim(all)[[1]]
a<-ggplot(all,aes(x=log10(mean.exp),y=h2)) + geom_point() + xlab(expression(paste("log"[10],"(mean expression raw counts)"))) + ylab(expression("Local h"^2)) + ggtitle("DGN-WB (" %&% ngenes %&% " genes)") + theme_bw(20)+ geom_smooth(method = "lm")
a
summary(lm(h2~mean.exp,all))
##
## Call:
## lm(formula = h2 ~ mean.exp, data = all)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.12348 -0.10470 -0.06199 0.04329 0.81936
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.235e-01 1.380e-03 89.488 < 2e-16 ***
## mean.exp -3.825e-07 1.085e-07 -3.525 0.000425 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1558 on 13623 degrees of freedom
## Multiple R-squared: 0.0009114, Adjusted R-squared: 0.000838
## F-statistic: 12.43 on 1 and 13623 DF, p-value: 0.0004246
b<-ggplot(all,aes(x=log10(se.exp),y=h2)) + geom_point() + xlab(expression(paste("log"[10],"(SE expression raw counts)"))) + ylab(expression("Local h"^2)) + ggtitle("DGN-WB (" %&% ngenes %&% " genes)") + theme_bw(20)+ geom_smooth(method = "lm")
b
summary(lm(h2~se.exp,all))
##
## Call:
## lm(formula = h2 ~ se.exp, data = all)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.12283 -0.10476 -0.06223 0.04315 0.82021
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.228e-01 1.355e-03 90.641 <2e-16 ***
## se.exp -1.017e-05 4.057e-06 -2.508 0.0122 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1559 on 13623 degrees of freedom
## Multiple R-squared: 0.0004615, Adjusted R-squared: 0.0003881
## F-statistic: 6.289 on 1 and 13623 DF, p-value: 0.01216
tiff(filename=fig.dir %&% "FigSx-h2_v_exp_rawcounts.tiff",width=960,height=480)
multiplot(a,b,cols=2)
dev.off()
## quartz_off_screen
## 2
png(filename=fig.dir %&% "FigSx-h2_v_exp_rawcounts.png",width=960,height=480)
multiplot(a,b,cols=2)
dev.off()
## quartz_off_screen
## 2
###only include genes with h2>0.1
filall<-filter(all,h2>0.1)
ngenes<-dim(filall)[[1]]
a<-ggplot(filall,aes(x=log10(mean.exp),y=h2)) + geom_point() + xlab(expression(paste("log"[10],"(mean expression raw counts)"))) + ylab(expression("Local h"^2)) + ggtitle("DGN-WB (" %&% ngenes %&% " h2 > 0.1 genes)") + theme_bw(20)+ geom_smooth(method = "lm")
a
summary(lm(h2~mean.exp,filall))
##
## Call:
## lm(formula = h2 ~ mean.exp, data = filall)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.17465 -0.12825 -0.05403 0.08148 0.66783
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.748e-01 2.409e-03 114.102 <2e-16 ***
## mean.exp -1.705e-07 2.345e-07 -0.727 0.467
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1652 on 5055 degrees of freedom
## Multiple R-squared: 0.0001046, Adjusted R-squared: -9.321e-05
## F-statistic: 0.5288 on 1 and 5055 DF, p-value: 0.4671
b<-ggplot(filall,aes(x=log10(se.exp),y=h2)) + geom_point() + xlab(expression(paste("log"[10],"(SE expression raw counts)"))) + ylab(expression("Local h"^2)) + ggtitle("DGN-WB (" %&% ngenes %&% " h2 > 0.1 genes)") + theme_bw(20)+ geom_smooth(method = "lm")
b
summary(lm(h2~se.exp,filall))
##
## Call:
## lm(formula = h2 ~ se.exp, data = filall)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.17470 -0.12809 -0.05404 0.08182 0.66813
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.743e-01 2.396e-03 114.511 <2e-16 ***
## se.exp 6.629e-07 1.259e-05 0.053 0.958
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1652 on 5055 degrees of freedom
## Multiple R-squared: 5.487e-07, Adjusted R-squared: -0.0001973
## F-statistic: 0.002774 on 1 and 5055 DF, p-value: 0.958